# pySpark init

sudo timedatectl set-timezone Asia/Shanghai

python3 --version

pip3 --version 

# set repository internal
#sudo pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
#sudo pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple

sudo pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/
sudo pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple/


sudo python3 -m pip install --upgrade --force pip

pip3path=`which pip3`

# install pip dependencies
sudo $pip3path install --no-deps pandas pyathena fsspec boto3 botocore tenacity urllib3==1.26.12 s3transfer smart-open wrapt

# download fridge detect py code 
aws s3 cp s3://guzhang-dataset/code/ /home/hadoop/ --recursive


# scala spark dependencies init
aws s3 cp s3://streaming/flink/libs/fastjson-1.2.78.jar ~/fastjson-1.2.78.jar
aws s3 cp s3://streaming/flink/libs/mysql-connector-java-8.0.20.jar ~/mysql-connector-java-8.0.20.jar
aws s3 cp s3://streaming/flink/libs/druid-1.2.9.jar ~/druid-1.2.9.jar
aws s3 cp s3://streaming/flink/libs/config-1.4.2.jar ~/config-1.4.2.jar
aws s3 cp s3://streaming/flink/libs/spark-sql-datasource-1.0.4.jar ~/spark-sql-datasource-1.0.4.jar
aws s3 cp s3://streaming/flink/libs/hive-hcatalog-core-3.1.3-amzn-2.jar	 ~/hive-hcatalog-core-3.1.3-amzn-2.jar	
aws s3 cp s3://streaming/flink/libs/hudi-spark-bundle.jar ~/hudi-spark-bundle.jar	
sudo mkdir -p /usr/lib/spark/jars
sudo cp ~/fastjson-1.2.78.jar /usr/lib/spark/jars/
sudo cp ~/mysql-connector-java-8.0.20.jar /usr/lib/spark/jars/
sudo cp ~/druid-1.2.9.jar /usr/lib/spark/jars/
sudo cp ~/config-1.4.2.jar /usr/lib/spark/jars/
sudo cp ~/spark-sql-datasource-1.0.4.jar /usr/lib/spark/jars/
sudo cp ~/hive-hcatalog-core-3.1.3-amzn-2.jar /usr/lib/spark/jars/
sudo cp ~/hudi-spark-bundle.jar /usr/lib/spark/jars/


export AWS_ACCESS_KEY_ID=AKIAWTBG3E42IMYODWHL
export AWS_SECRET_ACCESS_KEY=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
export AWS_DEFAULT_REGION=cn-northwest-1
export HOODIE_ENV_fs_DOT_s3a_DOT_access_DOT_key=AKIAWTBG3E42IMYODWHL
export HOODIE_ENV_fs_DOT_s3a_DOT_secret_DOT_key=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
export HOODIE_ENV_fs_DOT_s3_DOT_awsAccessKeyId=AKIAWTBG3E42IMYODWHL
export HOODIE_ENV_fs_DOT_s3_DOT_awsSecretAccessKey=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
export HOODIE_ENV_fs_DOT_s3a_DOT_endpoint=s3.cn-northwest-1.amazonaws.com.cn
export HOODIE_ENV_fs_DOT_s3_DOT_endpoint=s3.cn-northwest-1.amazonaws.com.cn